import sqlite3
import os
def CreateDB():
	hcon=sqlite3.connect('test.db')
	hcur=hcon.cursor()
	stable="""
	create table word
	(
		id int(10) primary key,
		words char(11) not null,
		counts char(11)
	)
	"""
	hcur.execute(stable)
	hcur.close()
	hcon.close()

def AddInfo(hcon,hcur,id,words,counts):
	sql="insert into word(id,words,counts) values(?,?,?)"
	try:
		hcur.execute(sql,(id,words,counts))
		hcon.commit()
	except:
		hcon.rollback()

def getText():    
    txt=open('Lab4\mansea.txt','r',encoding='uft-8').read()    
    txt=txt.lower()    
    for ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'""”":    
        txt=txt.replace(ch,' ')       
    return txt    

hamletTxt=getText()    
words=hamletTxt.split()    
counts={}    
sumcount = 0  

if not os.path.exists('test.db'):
    CreateDB()
hcon=sqlite3.connect('test.db')
hcur=hcon.cursor()

for word in words:    
    counts[word]=counts.get(word,0)+1  
    sumcount = sumcount + 1 

counts_ex = counts.copy()    

items=list(counts_ex.items())    
items.sort(key=lambda x:x[1],reverse=True)    
for i in range(10):    
    word,count=items[i]    
    print('{0:<10}{1:>5}'.format(word,count))    

#将统计结果写入文本文件中    
outfile = open('Lab4\词频统计结果.txt', "w")    
lines = []      
lines.append('单词种类：'+str(len(items))+'\n')    
lines.append('单词总数：'+str(sumcount)+'\n')    
lines.append('词频排序如下:\n')    
lines.append('word\tcounts\n')    

s= ''    
x= ''
y= ''
for i in range(len(items)):    
    s = '\t'.join([str(items[i][0]), str(items[i][1])])    
    s += '\n'      
    x=str(items[i][0])
    y=str(items[i][1])
    #写入数据库
    AddInfo(hcon,hcur,i+1,x,y)
    lines.append(s)    

print('\n统计完成！\n')

#展示数据库
hcur.execute("select * from word")
result=hcur.fetchall()
print(result)


hcur.close()
hcon.close()
outfile.writelines(lines)    
outfile.close()
